# ITGC Testing & Risk Assessment Demo App for Indian Banks (SBI, Kotak, Axis, ICICI, etc.)
# ------------------------------------------------------------------------------------
# How to run (after installing dependencies):
#   1) Save this file as app.py
#   2) Create a new conda env (optional): conda create -n itgc-demo python=3.11
#   3) conda activate itgc-demo
#   4) pip install -r requirements.txt   (or)   pip install streamlit pandas numpy matplotlib python-docx xlsxwriter openpyxl pydantic
#   5) streamlit run app.py
#
# This app generates: (a) Test Program (DOCX), (b) Final Workpaper (Excel), (c) Findings & Recommendations (Excel & on-screen),
# and (d) a ZIP bundle for demo. It also produces synthetic test data to showcase the workflow.

from __future__ import annotations
import io
import zipfile
from datetime import datetime, date
from typing import List, Dict, Any

import numpy as np
import pandas as pd
import streamlit as st
import matplotlib.pyplot as plt
from pydantic import BaseModel, Field, validator
from docx import Document
from docx.shared import Pt

# -------------------------
# Domain Model
# -------------------------
ITGC_DOMAINS = [
    "Access Management",
    "Change Management",
    "Backup & Recovery",
    "Disaster Recovery",
    "Incident Management",
    "Audit Trail / Logging",
]

DEFAULT_CONTROLS = [
    {
        "Control ID": "AC-01",
        "Domain": "Access Management",
        "Control Name": "User provisioning requires documented approval",
        "Control Objective": "Ensure only authorized users get access to banking systems",
        "Frequency": "Ongoing",
        "Control Owner": "IT Security",
        "Financial Statement Impact": "High",
    },
    {
        "Control ID": "AC-02",
        "Domain": "Access Management",
        "Control Name": "Periodic user access review (quarterly)",
        "Control Objective": "Timely removal of inappropriate access",
        "Frequency": "Quarterly",
        "Control Owner": "IT Security",
        "Financial Statement Impact": "High",
    },
    {
        "Control ID": "CM-01",
        "Domain": "Change Management",
        "Control Name": "Changes are raised via ticketing tool and approved",
        "Control Objective": "Ensure changes are authorized, tested and documented",
        "Frequency": "Per Change",
        "Control Owner": "IT Apps",
        "Financial Statement Impact": "Medium",
    },
    {
        "Control ID": "BK-01",
        "Domain": "Backup & Recovery",
        "Control Name": "Daily backups with periodic restore testing",
        "Control Objective": "Ensure recoverability of critical systems/data",
        "Frequency": "Daily/Quarterly",
        "Control Owner": "IT Infra",
        "Financial Statement Impact": "High",
    },
    {
        "Control ID": "DR-01",
        "Domain": "Disaster Recovery",
        "Control Name": "Annual DR drill with RPO/RTO defined and met",
        "Control Objective": "Ensure resilience and continuity",
        "Frequency": "Annual",
        "Control Owner": "IT BCM",
        "Financial Statement Impact": "High",
    },
    {
        "Control ID": "IM-01",
        "Domain": "Incident Management",
        "Control Name": "Incidents logged, prioritized, RCA and closure approval",
        "Control Objective": "Ensure timely resolution and prevention of recurrence",
        "Frequency": "Ongoing",
        "Control Owner": "IT Ops",
        "Financial Statement Impact": "Medium",
    },
    {
        "Control ID": "AT-01",
        "Domain": "Audit Trail / Logging",
        "Control Name": "Critical systems maintain immutable logs; daily monitoring",
        "Control Objective": "Detect unauthorized or anomalous activity",
        "Frequency": "Daily",
        "Control Owner": "SOC",
        "Financial Statement Impact": "High",
    },
]

BANK_PROFILES = {
    "SBI": {"scale": "Very Large", "risk_modifier": 1.1},
    "ICICI": {"scale": "Large", "risk_modifier": 1.05},
    "HDFC": {"scale": "Large", "risk_modifier": 1.05},
    "Axis": {"scale": "Large", "risk_modifier": 1.03},
    "Kotak": {"scale": "Medium-Large", "risk_modifier": 1.02},
    "Other": {"scale": "Varies", "risk_modifier": 1.00},
}

SEVERITY_ORDER = {"Low": 1, "Medium": 2, "High": 3, "Critical": 4}

class ControlAssessment(BaseModel):
    control_id: str = Field(..., alias="Control ID")
    domain: str = Field(..., alias="Domain")
    control_name: str = Field(..., alias="Control Name")
    design_effective: str = Field("Yes")  # Yes/Partially/No
    operating_effective: str = Field("Yes")  # Yes/Partially/No
    population_size: int = Field(0)
    sample_size: int = Field(0)
    exceptions_found: int = Field(0)
    notes: str = Field("")

    @validator("design_effective", "operating_effective")
    def _validate_ynp(cls, v: str) -> str:
        allowed = {"Yes", "Partially", "No"}
        if v not in allowed:
            raise ValueError(f"Value must be one of {allowed}")
        return v

# -------------------------
# Synthetic Test Data Generators (for demo)
# -------------------------
np.random.seed(42)

def gen_access_logs(n=200):
    users = [f"user{i:03d}" for i in range(1, 51)]
    systems = ["CoreBanking", "Payments", "Treasury", "GL", "InternetBanking"]
    approvals = np.random.choice(["Approved", "Missing", "EmailOnly"], size=n, p=[0.78, 0.12, 0.10])
    return pd.DataFrame({
        "user": np.random.choice(users, n),
        "system": np.random.choice(systems, n),
        "request_date": pd.date_range(end=pd.Timestamp.today(), periods=n).date,
        "approval": approvals,
        "provisioned_by": np.random.choice(["IAM-01", "IAM-02", "Helpdesk"], n),
    })

def gen_change_tickets(n=120):
    statuses = ["Approved", "Implemented", "Rollback", "Rework"]
    approvals = np.random.choice(["ChangeMgr", "AppOwner", "CAB", "None"], n, p=[0.4,0.35,0.2,0.05])
    testing = np.random.choice(["UAT Evidence", "Missing UAT", "Peer Review"], n, p=[0.6,0.25,0.15])
    return pd.DataFrame({
        "ticket_id": [f"CHG{i:05d}" for i in range(1, n+1)],
        "status": np.random.choice(statuses, n),
        "approval": approvals,
        "testing": testing,
        "prod_date": pd.date_range(end=pd.Timestamp.today(), periods=n).date,
    })

def gen_backup_logs(n=90):
    jobs = [f"BKUP{i:04d}" for i in range(1, n+1)]
    success = np.random.choice(["Success", "Failed", "Partial"], n, p=[0.9,0.06,0.04])
    restore_test = np.random.choice(["Pass", "Fail", "NotRun"], n, p=[0.7,0.1,0.2])
    return pd.DataFrame({
        "job": jobs,
        "date": pd.date_range(end=pd.Timestamp.today(), periods=n).date,
        "status": success,
        "restore_test": restore_test,
    })

def gen_incidents(n=80):
    sev = ["Low", "Medium", "High", "Critical"]
    closure = np.random.choice(["OnTime", "Delayed", "NoRCA"], n, p=[0.6,0.3,0.1])
    return pd.DataFrame({
        "incident_id": [f"INC{i:05d}" for i in range(1, n+1)],
        "severity": np.random.choice(sev, n, p=[0.4,0.35,0.2,0.05]),
        "closure": closure,
        "rca": np.random.choice(["Provided", "Missing"], n, p=[0.8,0.2]),
    })

def gen_dr_drills(n=6):
    rpo_met = np.random.choice(["Yes", "No"], n, p=[0.8,0.2])
    rto_met = np.random.choice(["Yes", "No"], n, p=[0.75,0.25])
    return pd.DataFrame({
        "drill_date": pd.date_range(end=pd.Timestamp.today(), periods=n).date,
        "scope": np.random.choice(["CoreBanking", "Payments", "GL"], n),
        "RPO_met": rpo_met,
        "RTO_met": rto_met,
        "issues": np.random.randint(0,5,size=n),
    })

def gen_audit_trail(n=300):
    events = ["Login", "PrivEsc", "ConfigChange", "FailedLogin", "DataExport"]
    flagged = np.random.choice([0,1], n, p=[0.9,0.1])
    return pd.DataFrame({
        "event_ts": pd.date_range(end=pd.Timestamp.today(), periods=n),
        "system": np.random.choice(["CoreBanking","Payments","InternetBanking"], n),
        "event": np.random.choice(events, n),
        "user": [f"user{i:03d}" for i in np.random.randint(1, 51, size=n)],
        "flagged": flagged,
    })

# -------------------------
# Scoring & Findings Rules
# -------------------------

def compute_risk(control: ControlAssessment, bank_modifier: float) -> Dict[str, Any]:
    design_map = {"Yes": 0, "Partially": 1, "No": 2}
    op_map = {"Yes": 0, "Partially": 1, "No": 2}
    base = design_map[control.design_effective] + op_map[control.operating_effective]

    # Exception pressure
    exc_rate = 0 if control.sample_size == 0 else control.exceptions_found / max(1, control.sample_size)
    pressure = 0 if exc_rate == 0 else (1 if exc_rate <= 0.1 else 2)

    score = (base + pressure) * bank_modifier
    if score < 1.1:
        level = "Low"
    elif score < 2.1:
        level = "Medium"
    elif score < 3.1:
        level = "High"
    else:
        level = "Critical"
    return {"score": round(float(score), 2), "level": level}

FINDING_TEMPLATES = {
    "Access Management": {
        "condition": "Missing approvals observed in provisioning logs",
        "criteria": "Access should be granted only with documented approvals",
        "cause": "Gaps in IAM workflow enforcement",
        "effect": "Unauthorized access risk to critical systems",
        "recommendation": "Configure mandatory approval in IAM tool; periodic QC review",
    },
    "Change Management": {
        "condition": "Changes moved to prod with missing UAT/CAB",
        "criteria": "All changes require documented testing and approvals",
        "cause": "Ticket workflow not enforced",
        "effect": "Risk of unstable changes impacting financial reporting",
        "recommendation": "Enforce CAB gates and evidence attachment in tool",
    },
    "Backup & Recovery": {
        "condition": "Backup failures and restore tests not run",
        "criteria": "Daily backups must succeed; periodic restore tests required",
        "cause": "Monitoring gaps and lack of schedule compliance",
        "effect": "Data loss and delayed recovery",
        "recommendation": "Enable alerts; track restore tests; escalate failures",
    },
    "Disaster Recovery": {
        "condition": "RPO/RTO not met in recent DR drill",
        "criteria": "RPO/RTO targets must be achieved",
        "cause": "Incomplete DR readiness",
        "effect": "Extended outage and regulatory impact",
        "recommendation": "Update runbooks; remediate bottlenecks; retest",
    },
    "Incident Management": {
        "condition": "High severity incidents closed late without RCA",
        "criteria": "Major incidents require timely resolution and RCA",
        "cause": "Process adherence issues",
        "effect": "Recurring outages and customer impact",
        "recommendation": "Institute RCA SLA and governance reviews",
    },
    "Audit Trail / Logging": {
        "condition": "Flagged security events lack review/evidence",
        "criteria": "Security logs must be monitored and investigated",
        "cause": "SOC capacity/process gaps",
        "effect": "Undetected malicious activity",
        "recommendation": "Automate alert triage; evidence retention and review",
    },
}

# -------------------------
# Documents Builders
# -------------------------

def build_test_program_doc(bank: str, period: str, controls_df: pd.DataFrame) -> bytes:
    doc = Document()
    styles = doc.styles['Normal']
    styles.font.name = 'Calibri'
    styles.font.size = Pt(11)

    doc.add_heading(f"ITGC Test Program - {bank}", level=1)
    doc.add_paragraph(f"Period: {period}")
    doc.add_paragraph("Scope includes Access, Change, Backup, DR, Incident, and Audit Trail domains.")

    for _, row in controls_df.iterrows():
        doc.add_heading(f"{row['Control ID']} - {row['Control Name']}", level=2)
        doc.add_paragraph(f"Domain: {row['Domain']}")
        doc.add_paragraph(f"Objective: {row['Control Objective']}")
        steps = test_steps_for_domain(row['Domain'])
        doc.add_paragraph("Test Steps:")
        for i, s in enumerate(steps, 1):
            doc.add_paragraph(f"{i}. {s}", style=None)
    buf = io.BytesIO()
    doc.save(buf)
    return buf.getvalue()


def test_steps_for_domain(domain: str) -> List[str]:
    if domain == "Access Management":
        return [
            "Obtain population of access requests; select sample",
            "Inspect approvals and SoD checks",
            "Verify timely provisioning/de-provisioning",
            "Trace to system logs",
        ]
    if domain == "Change Management":
        return [
            "Obtain change population from ticketing tool",
            "Validate approvals (App Owner/CAB)",
            "Inspect UAT/peer review evidence",
            "Verify implementation and post-deployment checks",
        ]
    if domain == "Backup & Recovery":
        return [
            "Obtain backup job dashboard and logs",
            "Evaluate failures and retry evidence",
            "Inspect restore test results",
        ]
    if domain == "Disaster Recovery":
        return [
            "Obtain DR plan and last drill report",
            "Assess RPO/RTO vs target",
            "Review remediation of prior issues",
        ]
    if domain == "Incident Management":
        return [
            "Obtain incident register; sample high severity",
            "Verify RCA, approvals, and closure timeliness",
            "Check preventative actions tracking",
        ]
    if domain == "Audit Trail / Logging":
        return [
            "Obtain SIEM/alarm report for scope period",
            "Sample flagged events; verify investigation evidence",
            "Assess log retention and integrity",
        ]
    return ["Agree scope and tailor test steps to system landscape"]


def build_workpaper_excel(bank: str, period: str, assessments: pd.DataFrame, findings: pd.DataFrame) -> bytes:
    buf = io.BytesIO()
    with pd.ExcelWriter(buf, engine="xlsxwriter") as writer:
        # Cover
        cover = pd.DataFrame({
            "Field": ["Bank", "Period", "Prepared On"],
            "Value": [bank, period, datetime.now().strftime("%Y-%m-%d %H:%M")],
        })
        cover.to_excel(writer, index=False, sheet_name="Cover")

        # Assessments
        assessments.to_excel(writer, index=False, sheet_name="Assessments")

        # Findings
        findings.to_excel(writer, index=False, sheet_name="Findings")

        # Readme
        readme = pd.DataFrame({
            "Item": ["Purpose", "How to Use"],
            "Details": [
                "Demo workpaper for ITGC testing",
                "Review Assessments and Findings tabs; link evidence where applicable",
            ],
        })
        readme.to_excel(writer, index=False, sheet_name="Readme")
    return buf.getvalue()

# -------------------------
# Streamlit UI
# -------------------------

st.set_page_config(page_title="ITGC Testing & Risk Assessment Demo", layout="wide")

st.title("ITGC Testing & Risk Assessment – Demo App")
with st.sidebar:
    st.header("Setup")
    bank = st.selectbox("Bank", list(BANK_PROFILES.keys()), index=0)
    period = st.text_input("Audit Period (e.g., FY 2024-25)", value="FY 2024-25")

    st.markdown("""
    **Controls Input**  
    Upload a controls list (Excel/CSV) with columns: Control ID, Domain, Control Name, Control Objective, Frequency, Control Owner, Financial Statement Impact.  
    Or use the template below.
    """)

    controls_file = st.file_uploader("Upload Controls (xlsx/csv)", type=["xlsx","csv"])
    use_template = st.checkbox("Use built-in control template", value=True)

    st.markdown("---")
    st.subheader("Synthetic Test Data")
    gen_data = st.checkbox("Generate demo datasets", value=True)

if controls_file is not None:
    if controls_file.name.endswith(".csv"):
        controls_df = pd.read_csv(controls_file)
    else:
        controls_df = pd.read_excel(controls_file)
else:
    controls_df = pd.DataFrame(DEFAULT_CONTROLS) if use_template else pd.DataFrame(columns=[
        "Control ID","Domain","Control Name","Control Objective","Frequency","Control Owner","Financial Statement Impact"
    ])

# Display Controls
st.subheader("Controls in Scope")
st.dataframe(controls_df, use_container_width=True)

# Demo datasets
if gen_data:
    access_df = gen_access_logs()
    change_df = gen_change_tickets()
    backup_df = gen_backup_logs()
    incident_df = gen_incidents()
    dr_df = gen_dr_drills()
    audittrail_df = gen_audit_trail()
else:
    access_df = change_df = backup_df = incident_df = dr_df = audittrail_df = None

# Quick analytics & potential issues
def quick_indicators():
    issues = []
    if access_df is not None:
        missing = (access_df['approval'] == 'Missing').mean()
        if missing > 0.05:
            issues.append(("Access Management", f"{missing:.1%} provisioning without approvals"))
    if change_df is not None:
        miss_uat = (change_df['testing'] == 'Missing UAT').mean()
        if miss_uat > 0.1:
            issues.append(("Change Management", f"{miss_uat:.1%} changes with Missing UAT"))
    if backup_df is not None:
        failures = (backup_df['status'] != 'Success').mean()
        if failures > 0.08:
            issues.append(("Backup & Recovery", f"{failures:.1%} backup failures/partials"))
    if dr_df is not None:
        not_met = ((dr_df['RPO_met'] == 'No') | (dr_df['RTO_met'] == 'No')).mean()
        if not_met > 0.2:
            issues.append(("Disaster Recovery", f"{not_met:.1%} DR drills missed targets"))
    if incident_df is not None:
        delayed = (incident_df['closure'] == 'Delayed').mean()
        if delayed > 0.25:
            issues.append(("Incident Management", f"{delayed:.1%} incidents closed delayed"))
    if audittrail_df is not None:
        flagged = (audittrail_df['flagged'] == 1).mean()
        if flagged > 0.12:
            issues.append(("Audit Trail / Logging", f"{flagged:.1%} events flagged by SIEM"))
    return issues

issues = quick_indicators()

# Risk assessment grid (user-editable effectiveness & sampling)
st.subheader("Risk Assessment & Testing Inputs")

if len(controls_df) == 0:
    st.info("Upload or select a controls list to continue.")
    st.stop()

with st.expander("Enter assessment details per control"):
    edited_df = controls_df.copy()
    edited_df["Design Effective"] = "Yes"
    edited_df["Operating Effective"] = "Yes"
    edited_df["Population"] = 25
    edited_df["Sample"] = 10
    edited_df["Exceptions"] = 0
    edited_df = st.data_editor(
        edited_df,
        hide_index=True,
        use_container_width=True,
        num_rows="dynamic",
        column_config={
            "Design Effective": st.column_config.SelectboxColumn(options=["Yes","Partially","No"]),
            "Operating Effective": st.column_config.SelectboxColumn(options=["Yes","Partially","No"]),
        },
    )

# Compute risk
modifier = BANK_PROFILES.get(bank, {"risk_modifier": 1.0})["risk_modifier"]
assessments = []
for _, r in edited_df.iterrows():
    ca = ControlAssessment(**{
        "Control ID": r["Control ID"],
        "Domain": r["Domain"],
        "Control Name": r["Control Name"],
        "design_effective": r["Design Effective"],
        "operating_effective": r["Operating Effective"],
        "population_size": int(r["Population"]),
        "sample_size": int(r["Sample"]),
        "exceptions_found": int(r["Exceptions"]),
    })
    score = compute_risk(ca, modifier)
    assessments.append({
        **r.to_dict(),
        "Risk Score": score["score"],
        "Risk Level": score["level"],
    })
assess_df = pd.DataFrame(assessments)

st.subheader("Risk Heatmap")
# Heatmap by Domain average score
heat = assess_df.groupby("Domain")["Risk Score"].mean().reset_index()
fig = plt.figure()
plt.imshow(heat[["Risk Score"]].values, aspect="auto")
plt.xticks([0],["Risk Score"])  # Single column
plt.yticks(range(len(heat)), heat["Domain"].tolist())
plt.colorbar()
st.pyplot(fig, use_container_width=True)

# Findings synthesis from issues and high risk controls
findings_rows = []
for dom, msg in issues:
    t = FINDING_TEMPLATES[dom]
    findings_rows.append({
        "Domain": dom,
        "Condition": msg if msg else t["condition"],
        "Criteria": t["criteria"],
        "Cause": t["cause"],
        "Effect": t["effect"],
        "Recommendation": t["recommendation"],
        "Severity": "High" if "Critical" not in msg else "Critical",
    })
# High risk controls with no detected issue still warrant observation
high_controls = assess_df[assess_df["Risk Level"].isin(["High","Critical"])]
for _, hr in high_controls.iterrows():
    dom = hr["Domain"]
    t = FINDING_TEMPLATES.get(dom, list(FINDING_TEMPLATES.values())[0])
    findings_rows.append({
        "Domain": dom,
        "Condition": f"Elevated risk score for {hr['Control ID']} – {hr['Control Name']}",
        "Criteria": t["criteria"],
        "Cause": t["cause"],
        "Effect": t["effect"],
        "Recommendation": t["recommendation"],
        "Severity": "High",
    })

findings_df = pd.DataFrame(findings_rows).drop_duplicates()

st.subheader("Findings & Recommendations")
st.dataframe(findings_df, use_container_width=True)

# Build documents
st.subheader("Deliverables")
col1, col2, col3, col4 = st.columns(4)
with col1:
    if st.button("Generate Test Program (DOCX)"):
        doc_bytes = build_test_program_doc(bank, period, controls_df)
        st.download_button(
            "Download Test Program.docx",
            data=doc_bytes, file_name=f"ITGC_Test_Program_{bank}.docx", mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
        )
with col2:
    if st.button("Generate Workpaper (Excel)"):
        xls_bytes = build_workpaper_excel(bank, period, assess_df, findings_df)
        st.download_button(
            "Download Workpaper.xlsx",
            data=xls_bytes, file_name=f"ITGC_Workpaper_{bank}.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
        )
with col3:
    csv = findings_df.to_csv(index=False).encode()
    st.download_button("Download Findings (CSV)", data=csv, file_name=f"ITGC_Findings_{bank}.csv", mime="text/csv")
with col4:
    if st.button("Bundle ZIP (all)"):
        zbuf = io.BytesIO()
        with zipfile.ZipFile(zbuf, 'w', compression=zipfile.ZIP_DEFLATED) as zf:
            # docs
            zf.writestr(f"ITGC_Test_Program_{bank}.docx", build_test_program_doc(bank, period, controls_df))
            zf.writestr(f"ITGC_Workpaper_{bank}.xlsx", build_workpaper_excel(bank, period, assess_df, findings_df))
            zf.writestr(f"ITGC_Findings_{bank}.csv", findings_df.to_csv(index=False))
            # data samples
            if gen_data:
                zf.writestr("data/access_logs.csv", access_df.to_csv(index=False))
                zf.writestr("data/change_tickets.csv", change_df.to_csv(index=False))
                zf.writestr("data/backup_logs.csv", backup_df.to_csv(index=False))
                zf.writestr("data/incidents.csv", incident_df.to_csv(index=False))
                zf.writestr("data/dr_drills.csv", dr_df.to_csv(index=False))
                zf.writestr("data/audittrail.csv", audittrail_df.to_csv(index=False))
        st.download_button("Download Demo Bundle.zip", data=zbuf.getvalue(), file_name=f"ITGC_Demo_{bank}.zip", mime="application/zip")

# Guidance panel
with st.expander("Methodology Notes (for demo)"):
    st.markdown(
        """
        - Domains align to common ITGC areas used across Indian banks.
        - Risk is computed from design/operating effectiveness and exception pressure with a bank size modifier.
        - Findings templates are generic; tailor to the bank’s policy, regulator expectations (e.g., RBI), and system stack.
        - Replace synthetic datasets with real evidence exports to run a live walkthrough.
        """
    )

st.success("Demo ready. Adjust inputs, review heatmap, and generate deliverables.")